A demo for GAA Submission evaluation package

source("GAA-EVAL.R")
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Warning: package 'pROC' was built under R version 3.4.4
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 3.4.3
## Warning: package 'ggrepel' was built under R version 3.4.4
## Warning: package 'gmodels' was built under R version 3.4.4
## 
## Attaching package: 'gmodels'
## The following object is masked from 'package:pROC':
## 
##     ci

Read in the Experimental data provided by CAGI

exp.data <- read.RealData(file = "exp_data.txt", sep = "\t",
                             col.id = 1, col.value = 2, col.sd = 3)
head(exp.data$value)
##  A10G A129V A131S A150T A150W A152D 
##  3523   564  4051  1850  3325   451

Read in the submission folders

sub.data <- read.Submission.Folder(folder.name = "prediction/",col.id = 1,
                                      col.value = 2, col.sd = 3, real.data = exp.data)
## [1] "prediction//Group_39/Group_39-prediction_file-1.txt"
## [1] "prediction//Group_39/Group_39-prediction_file-2.txt"
## [1] "prediction//Group_40/Group_40-prediction_file-1.txt"
## [1] "prediction//Group_41/Group_41-prediction_file-1.txt"
## [1] "prediction//Group_41/Group_41-prediction_file-2.txt"
## [1] "prediction//Group_42/Group_42-prediction_file-1.txt"
## [1] "prediction//Group_42/Group_42-prediction_file-2.txt"
## [1] "prediction//Group_43/Group_43-prediction_file-1.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-1.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-2.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-3.txt"
## [1] "prediction//Group_44/Group_44-prediction_file-4.txt"
## [1] "prediction//Group_45/Group_45-prediction_file-1.txt"
## [1] "prediction//Group_46/Group_46-prediction_file-1-late.txt"
## [1] "prediction//Group_47/Group_47-prediction_file-1.txt"
## [1] "prediction//Group_47/Group_47-prediction_file-2.txt"
sub.data = addGroup(sub.data,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))

ScatterPlot inspection

plot_all_scatter(real.data = exp.data, pred.data = sub.data, z.transform = TRUE)
## Warning: Removed 4890 rows containing missing values (geom_point).

## Warning: Removed 4890 rows containing missing values (geom_point).

## Warning: Removed 4890 rows containing missing values (geom_point).
## Warning: Removed 4427 rows containing missing values (geom_point).

Correlation-based Evaluation

# 1. Render coefficient value
result.cor.pearson <- eval.Correlation(real.data = exp.data, pred.data = sub.data,
                                       method = "pearson", sd.use = NA,z.transform = TRUE)
head(result.cor.pearson)
##                            pearson.coefficient.n=5109      p.value
## Group_39-prediction_file-1                  0.2205440 2.551258e-57
## Group_39-prediction_file-2                  0.2142851 3.864431e-54
## Group_40-prediction_file-1                  0.3118278 2.525095e-06
## Group_41-prediction_file-1                  0.2138742 6.199527e-54
## Group_41-prediction_file-2                  0.2220716 4.124447e-58
## Group_42-prediction_file-1                  0.1532161 2.334007e-02
# 2. Plot Correlation
plot.Correlation(result.cor.pearson, "Pearson")

RMSD-based Evaluation

result.rmsd <- eval.RMSD(real.data = exp.data, pred.data = sub.data,sd.use = NA, 
                      density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = TRUE)
head(result.rmsd)
##                                    RMSD
## Group_39-prediction_file-1 0.0017074087
## Group_39-prediction_file-2 0.0017074330
## Group_40-prediction_file-1 0.0003547050
## Group_41-prediction_file-1 0.0017073238
## Group_41-prediction_file-2 0.0017073365
## Group_42-prediction_file-1 0.0003547284
plot.RMSD(result.rmsd, method="")

# with variance.normalization
result.rmsd <- eval.RMSD(real.data = exp.data, pred.data = sub.data,sd.use = NA, 
                      density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = FALSE)
head(result.rmsd)
##                                 RMSD
## Group_39-prediction_file-1 3450.4299
## Group_39-prediction_file-2 3450.4789
## Group_40-prediction_file-1  716.8082
## Group_41-prediction_file-1 3450.2580
## Group_41-prediction_file-2 3450.2837
## Group_42-prediction_file-1  716.8556
plot.RMSD(result.rmsd, method="")

Cut-off-based Evaluation

result.auc.0.4 <- eval.AUC(real.data = exp.data, pred.data = sub.data, 
                           threshold = 0.4,z.transform = T)
## [1] "Group_39-prediction_file-1"
## [1] "Group_39-prediction_file-2"
## [1] "Group_40-prediction_file-1"
## [1] "Group_41-prediction_file-1"
## [1] "Group_41-prediction_file-2"
## [1] "Group_42-prediction_file-1"
## [1] "Group_42-prediction_file-2"
## [1] "Group_43-prediction_file-1"
## [1] "Group_44-prediction_file-1"
## [1] "Group_44-prediction_file-2"
## [1] "Group_44-prediction_file-3"
## [1] "Group_44-prediction_file-4"
## [1] "Group_45-prediction_file-1"
## [1] "Group_46-prediction_file-1-late"
## [1] "Group_47-prediction_file-1"
## [1] "Group_47-prediction_file-2"
head(result.auc.0.4$results)
##                            AUC.0.4 sensitivity specificity accuracy
## Group_39-prediction_file-1  0.6206      0.3318      0.8432   0.6408
## Group_39-prediction_file-2  0.6070      0.2982      0.8827   0.6514
## Group_40-prediction_file-1  0.7330      0.3840      0.8617   0.5890
## Group_41-prediction_file-1  0.6370      0.5084      0.6955   0.6215
## Group_41-prediction_file-2  0.6427      0.5114      0.6945   0.6220
## Group_42-prediction_file-1  0.6091      0.3680      0.7447   0.5297
##                            precision f1_score  bAccu
## Group_39-prediction_file-1    0.5810   0.4224 0.5875
## Group_39-prediction_file-2    0.6249   0.4037 0.5905
## Group_40-prediction_file-1    0.7869   0.5161 0.6229
## Group_41-prediction_file-1    0.5224   0.5153 0.6020
## Group_41-prediction_file-2    0.5230   0.5171 0.6030
## Group_42-prediction_file-1    0.6571   0.4718 0.5563
plot.AUC(result.auc.0.4)

Between-method Evaluation

# for all the submission files
result.bM.spearman <-eval.Correlation.Between(real.data = exp.data, pred.data = sub.data,
                                              method = "spearman",sd.use = NA,z.transform = TRUE)
plot.Correlation.Between(result.bM.spearman$coefficient, method="Spearman")

# for best submission of each group
result.bM.spearman <-eval.Correlation.Between(real.data = exp.data, pred.data = sub.data,
                                              method = "pearson",sd.use = NA,z.transform = TRUE,grouped = TRUE)
## Warning: package 'bindrcpp' was built under R version 3.4.4
plot.Correlation.Between(result.bM.spearman$coefficient, method="pearson")

Partial-Correlation Evaluation

result.pCor <- eval.Partial.Correlation(real.data = exp.data, pred.data = sub.data, method = "spearman")
## Loading required package: ppcor
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.4.4
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Warning in pcor(mat, method = "spearman", ...): The inverse of variance-
## covariance matrix is calculated using Moore-Penrose generalized matrix
## invers due to its determinant of zero.
## Warning in sqrt((n - 2 - gp)/(1 - pcor^2)): NaNs produced
plot.Correlation.Between(result.bM.spearman$coefficient, method="Spearman")

PCA Plot

total = cbind(real = exp.data$value,sub.data$value)
Plot.PCA(na.omit(total), labels=F, legend=TRUE) 

Uniqueness Evaluation

# uniqueness as adj.r^2 difference between total linear model and linear models without certain group
result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data)
result.uniq
##      uniqueness
## 1 -0.0033968380
## 2 -0.0022404747
## 3  0.0190299731
## 4 -0.0024418645
## 5  0.0253771290
## 6  0.0082466666
## 7 -0.0028653213
## 8 -0.0004954721
## 9 -0.0013843547
plot.uniqueness(result.uniq, method="")

Bootstrap Analysis

For Correlation-based Evaluation, provide mean, CI, and median pval

# 1. Render coefficient value
boot.result.cor.pearson <- eval.Correlation(real.data = exp.data, pred.data = sub.data,
                                       method = "pearson", sd.use = NA,z.transform = TRUE,boot = T)
head(boot.result.cor.pearson)
##                                  avg     low_ci   high_ci         sd
## Group_39-prediction_file-1 0.2207798 0.20120188 0.2412131 0.01218407
## Group_39-prediction_file-2 0.2146817 0.19630053 0.2346390 0.01185537
## Group_40-prediction_file-1 0.3088733 0.20984235 0.3985163 0.05834748
## Group_41-prediction_file-1 0.2141488 0.19221336 0.2342647 0.01326737
## Group_41-prediction_file-2 0.2222340 0.20093036 0.2428381 0.01306769
## Group_42-prediction_file-1 0.1544749 0.04063509 0.2615746 0.06693900
##                                 p.value
## Group_39-prediction_file-1 1.996242e-57
## Group_39-prediction_file-2 2.944147e-54
## Group_40-prediction_file-1 2.441409e-06
## Group_41-prediction_file-1 2.164946e-54
## Group_41-prediction_file-2 1.403092e-58
## Group_42-prediction_file-1 2.142895e-02
# 2. Plot Correlation
plot.Correlation(boot.result.cor.pearson, "Pearson",boot = TRUE)

For RMSD-based Evaluation

boot.result.rmsd <- eval.RMSD(real.data = exp.data, pred.data = sub.data,sd.use = NA, 
                      density.distance = F, density.distance.adjust = F, variance.normalization = T,boot = TRUE)
head(boot.result.rmsd)
##                                    RMSD       low_ci      high_ci
## Group_39-prediction_file-1 0.0014325208 0.0014046682 0.0014635107
## Group_39-prediction_file-2 0.0014325540 0.0014047012 0.0014635450
## Group_40-prediction_file-1 0.0003260579 0.0003021397 0.0003487363
## Group_41-prediction_file-1 0.0014323977 0.0014045511 0.0014633831
## Group_41-prediction_file-2 0.0014324163 0.0014045686 0.0014634023
## Group_42-prediction_file-1 0.0003260854 0.0003021686 0.0003487574
##                                      sd
## Group_39-prediction_file-1 1.752230e-05
## Group_39-prediction_file-2 1.752269e-05
## Group_40-prediction_file-1 1.370471e-05
## Group_41-prediction_file-1 1.752028e-05
## Group_41-prediction_file-2 1.752052e-05
## Group_42-prediction_file-1 1.370550e-05
plot.RMSD(boot.result.rmsd, method="",boot = TRUE)

For Uniqueness Evaluation

result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data,boot = TRUE)
result.uniq
##      uniqueness       low_ci     high_ci          sd
## 1 -0.0005991351 -0.003638222 0.008117041 0.004208176
## 2  0.0014178271 -0.003470437 0.013948044 0.006618052
## 3  0.0222532819  0.001349078 0.057484825 0.017401726
## 4  0.0010291336 -0.003565168 0.012440378 0.005844097
## 5  0.0257885881  0.001552415 0.060673355 0.017557189
## 6  0.0096609177 -0.002808184 0.032872572 0.010947511
## 7 -0.0006321320 -0.003579500 0.007359578 0.003745998
## 8  0.0024353157 -0.003410292 0.016190723 0.006748525
## 9  0.0019408102 -0.003631670 0.016038266 0.006610507
plot.uniqueness(result.uniq, method="",boot = TRUE)